
import logging
from init import PATHS
LOGGER = logging.getLogger(__name__)
import pandas as pd



def main():
	firm_level_sales = import_and_clean_regional_sales_data()
	firm_level_sales.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/sales_firm_year_type_country_level.csv', index=False)
	# export things at more aggregated level
	construct_and_export_sales_at_firm_level(firm_level_sales)
	construct_and_export_sales_at_country_level(firm_level_sales)


def import_and_clean_regional_sales_data():
	# The sales data was downloaded from Marklines region by region
	regions = ['Asia_Oceania', 'Americas', 'WesternEurope', 'CentralEasternEurope', 'Africa_ME']
	region_files = ['Asia+Oceania_allsales.xlsx', 'North+South America_allsales_20220121213311090_download.xlsx', 'Western_Europe_allsales.xlsx', 'Central+Eastern_Europe_allsales.xlsx', 'Africa+Middle_East_allsales.xlsx']
	firm_level = []
	# Import OEM dataframe
	oems = pd.read_csv(PATHS.marklines / 'OEMs.csv', dtype=object)
	# we will later on merge on names of the group so let's clean up
	oems['Level1_MLName'] = oems['Level1_MLName'].str.strip()
	# Loop through all the files
	# rf = region_files[0]
	# region = regions[0]
	for region, rf in zip(regions, region_files):
		print(region)
		xls = pd.ExcelFile(PATHS.marklines / 'Sales_data' / rf)
		for sheet in xls.sheet_names:
			sales_by_type_by_country_by_oem = cleaning_raw_sales_data_sheet(region, oems, sheet, rf)
			firm_level.append(sales_by_type_by_country_by_oem)
	firm_level_sales = pd.concat(firm_level)
	return firm_level_sales



def cleaning_raw_sales_data_sheet(region, oems, sheet, rf):
	sales = pd.read_excel(PATHS.marklines / 'Sales_data' / rf, sheet_name=sheet, header=1, dtype=object)
	if region != 'WesternEurope':
		sales = pd.melt(sales, id_vars=['Country', 'Group', 'Maker/Brand', 'Type', 'Segment', 'Model', 'PowerTrain'],
						value_vars=['2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011', '2012', '2013',
									'2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021'], var_name='year')
	else:
		sales = pd.melt(sales, id_vars=['Country', 'Group', 'Maker/Brand', 'Type', 'Segment', 'Model', 'PowerTrain'],
						value_vars=[2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017,
									2018, 2019, 2020, 2021], var_name='year')
		sales['year'] = sales['year'].astype(str)
	# Constructing a variable that indicates: HV, EV, or ICE
	sales['Type'] = sales['PowerTrain']
	sales.loc[sales['Type'].isin(['Mild HV', 'HV', 'HV/EV', 'HV/EV/PHV', 'HV/PHV', '48V Mild HV']), 'Type'] = 'HV'
	sales.loc[sales['Type'] == 'EV/FCV/PHV', 'Type'] = 'EV'
	sales.loc[sales['Type'].isna(), 'Type'] = 'ICE'
	sales.loc[sales['Type'] == 'DE', 'Type'] = 'ICE'
	# deal with missing values
	sales.loc[sales['value'] == '-', 'value'] = 0
	sales['value'] = sales['value'].astype(float)
	# For the few companies that are at the Maker/Brand level instead of group
	sales.loc[sales['Maker/Brand'] == 'Nissan', 'Group'] = 'Nissan'
	sales.loc[sales['Maker/Brand'].isin(['Renault', 'Renault-Samsung', 'AvtoVAZ', 'Dacia']), 'Group'] = 'Renault'
	sales = sales.rename(columns={'Group': 'Level1_MLName', 'year': 'Year'})
	# Merging with oems to get ids 
	sales = sales.merge(oems[['OEM_Level1_ID', 'Level1_MLName', 'Year']], on=['Level1_MLName', 'Year'], how='inner')
	sales = sales.drop_duplicates(subset=['Year', 'Country', 'Level1_MLName', 'Maker/Brand', 'Type', 'Segment', 'Model', 'value'])
	sales_by_type_by_country_by_oem = sales.groupby(['OEM_Level1_ID', 'Year', 'Type', 'Level1_MLName', 'Country'])['value'].sum().reset_index()
	sales_by_type_by_country_by_oem['Region'] = region
	return sales_by_type_by_country_by_oem


def construct_and_export_sales_at_firm_level(firm_level_sales):
	firm_level_sales_by_country = firm_level_sales.groupby(['OEM_Level1_ID', 'Year', 'Level1_MLName', 'Country'])['value'].sum().reset_index()
	firm_level_sales_by_country.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/sales_firm_year_country_level.csv', index=False)
	firm_level_sales_by_type = firm_level_sales.groupby(['OEM_Level1_ID', 'Year', 'Level1_MLName', 'Type'])['value'].sum().reset_index()
	firm_level_sales_by_type = pd.pivot(data=firm_level_sales_by_type, index=['OEM_Level1_ID', 'Level1_MLName', 'Year'], columns='Type', values='value').reset_index()
	firm_level_sales_by_type['HybridCar_Sales'] = firm_level_sales_by_type[['HV', 'PHV'] ].sum(axis=1, skipna=True)
	firm_level_sales_by_type['CleanCar_Sales'] = firm_level_sales_by_type[['HV', 'PHV', 'EV', 'FCV']].sum(axis=1, skipna=True)
	firm_level_sales_by_type.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/sales_firm_year_type_level.csv', index=False)


def construct_and_export_sales_at_country_level(firm_level_sales):
	country_level_sales_by_type = firm_level_sales.groupby(['Year', 'Country', 'Type'])['value'].sum().reset_index()
	country_level_sales_by_type = pd.pivot(data=country_level_sales_by_type, index=['Country', 'Year'], columns='Type', values='value').reset_index()
	country_level_sales_by_type['HybridCar_Sales'] = country_level_sales_by_type[['HV', 'PHV']].sum(axis=1, skipna=True)
	country_level_sales_by_type['CleanCar_Sales'] = country_level_sales_by_type[['HV', 'PHV', 'EV', 'FCV']].sum(axis=1, skipna=True)
	country_level_sales_by_type.to_csv(PATHS.dropbox / 'Data_outputted/A_AutoIndustry/sales_country_year_type_level.csv', index=False)
	
